DS002R Final Presentation
Goal: To analyze traffic stop patterns across three U.S. jurisdictions using the Stanford Open Policing Project (SOPP).
Scope
Methodology
CASE WHEN, UNION, filtering).Key Findings
ny_stops_clean <- ny_stops |>
mutate(across(where(bit64::is.integer64), as.integer))
ny_stops_pct <- ny_stops_clean |>
group_by(year) |>
mutate(
total_stops_year = sum(n_stops),
pct = n_stops / total_stops_year
) |>
ungroup()
ggplot(ny_stops_pct, aes(x = year, y = pct, color = race)) +
geom_line(linewidth = 1.2) +
geom_point(size = 3) +
scale_y_continuous(labels = percent_format(accuracy = 1)) +
labs(
title = "NY Traffic Stops by Race (2010-2018)",
x = "Year",
y = "Percent of Stops",
color = "Race"
) +
theme_minimal(base_size = 16) +
theme(legend.position = "bottom")Reporting differences
Data note: The “NA/Unknown” category likely mixes bikes, scooters, or other missing labels and should be treated as a reporting consistency rather than a real stop type. :::
combined_types <- ca_stops_combined |>
mutate(across(where(bit64::is.integer64), as.integer)) |>
mutate(
stop_type = ifelse(is.na(stop_type) | stop_type == "", "NA/Unknown", stop_type)
) |>
group_by(city) |>
mutate(stop_type = reorder(stop_type, type_count)) |>
ungroup()
ggplot(combined_types, aes(x = stop_type, y = type_count)) +
geom_col(fill = "#4C72B0") +
coord_flip() +
scale_y_continuous(labels = comma) +
facet_wrap(~ city, nrow = 1, scales = "free_x") +
labs(
title = "Stop Types: Long Beach vs. San Bernardino",
x = "Stop Type",
y = "Count"
) +
theme_minimal(base_size = 16)Search vs. frisk
SELECT
CASE
WHEN subject_race IS NULL OR subject_race IN ('other', 'unknown', '') THEN 'unknown/other'
ELSE subject_race
END AS race,
COUNT(*) AS total_stops,
SUM(CASE WHEN search_conducted = 1 THEN 1 ELSE 0 END) AS num_searches,
SUM(CASE WHEN frisk_performed = 1 THEN 1 ELSE 0 END) AS num_frisk,
1.0 * SUM(CASE WHEN search_conducted = 1 THEN 1 ELSE 0 END) / COUNT(*) AS search_rate,
1.0 * SUM(CASE WHEN frisk_performed = 1 THEN 1 ELSE 0 END) / COUNT(*) AS frisk_rate
FROM fl_statewide_2020_04_01
GROUP BY race;fl_search_long <- fl_search |>
mutate(across(where(bit64::is.integer64), as.integer)) |>
select(race, search_rate, frisk_rate) |>
mutate(race = factor(race)) |>
pivot_longer(
cols = c(search_rate, frisk_rate),
names_to = "metric",
values_to = "rate"
) |>
mutate(
metric = recode(
metric,
"search_rate" = "Search Rate",
"frisk_rate" = "Frisk Rate"
)
)
ggplot(fl_search_long,
aes(x = race, y = rate, fill = race)) +
geom_col() +
facet_wrap(~ metric, ncol = 2, scales = "free_x") +
coord_flip() +
scale_y_continuous(labels = percent_format(accuracy = 0.1)) +
labs(
title = "Search & Frisk Rates (Florida)",
x = "Race",
y = "Rate"
) +
theme_minimal(base_size = 16) +
theme(legend.position = "none")Data Sources
Pierson, E., Simoiu, C., Overgoor, J., Corbett-Davies, S., Jenson, D., Shoemaker, A., Ramachandran, V., et al. (2020). “A Large-Scale Analysis of Racial Disparities in Police Stops Across the United States.” Nature Human Behaviour, 1–10.
Stanford Open Policing Project (SOPP) Data. Traffic stop data compiled by the Stanford Open Policing Project and accessed through the Pomona College SQL server. Original dataset and documentation available at: https://openpolicing.stanford.edu. Pierson et al. (2020).